Note: Open scripts.Rproj first, then script. To easily use relative paths, click the down button next to knit and then click “Knit Directory –> Project Directory”. This should make loading and saving files much easier.
Perform GO Semantic Similarity analysis to gain additional insights
into our DGE enriched GO terms, using results from DGE analysis (i.e.,
after running the 1a-DGE.Rmd script). Red text indicates regions that require the
user to modify. Regardless, the user should check over all code blocks
to ensure that everything is running correctly.
Load packages
library(tidyverse)
library(org.Hs.eg.db)
library(simplifyEnrichment)
library(magick)
Change file names and conditions where appropriate.
# Input unfiltered data
GO_term.file <- "salmon.numreads.DGE_results.GOsig.tsv"
Load GO data and select only the GO_IDs associated with BP, MF, and CC terms.
GO <- read.csv(GO_term.file, header=TRUE, sep="\t")
# Print stats
print(paste("Number sig BP terms: ", nrow(filter(GO, ontology=="BP")), sep=''))
## [1] "Number sig BP terms: 53"
print(paste("Number sig MF terms: ", nrow(filter(GO, ontology=="MF")), sep=''))
## [1] "Number sig MF terms: 10"
print(paste("Number sig CC terms: ", nrow(filter(GO, ontology=="CC")), sep=''))
## [1] "Number sig CC terms: 13"
# Extract BP, MF, and CC separatly
GO.BP <- GO %>% filter(ontology == "BP")
BP <- GO.BP$category
GO.MF <- GO %>% filter(ontology == "MF")
MF <- GO.MF$category
GO.CC <- GO %>% filter(ontology == "CC")
CC <- GO.CC$category
simplifyGO(GO_similarity(BP, ont = "BP", db = "org.Hs.eg.db"),
word_cloud_grob_param = list(max_width = 50),
max_words=20
)
## Cluster 53 terms by 'binary_cut'... 22 clusters, used 0.04145217 secs.
## Perform keywords enrichment for 22 GO lists...
simplifyGO(GO_similarity(MF, ont = "MF", db = "org.Hs.eg.db"),
word_cloud_grob_param = list(max_width = 50),
max_words=20
)
## Cluster 10 terms by 'binary_cut'... 6 clusters, used 0.004768848 secs.
## Perform keywords enrichment for 6 GO lists...
simplifyGO(GO_similarity(CC, ont = "CC", db = "org.Hs.eg.db"),
word_cloud_grob_param = list(max_width = 50),
max_words=20
)
## Cluster 13 terms by 'binary_cut'... 10 clusters, used 0.006582022 secs.
## Perform keywords enrichment for 10 GO lists...
GO.BP %>%
mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
ggplot(aes(x=term, y=over_represented_pvalue) ) +
geom_segment( aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
geom_point(size=3, color="#69b3a2") +
coord_flip() +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position="none"
) +
xlab("") +
ylab("over_represented_pvalue") +
ggtitle("Adult") + #add a main title
theme(plot.title = element_text(face = 'bold',
size = 12,
hjust = 0)) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank(),#Set the plot background
legend.position="none")
GO.MF %>%
mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
ggplot(aes(x=term, y=over_represented_pvalue) ) +
geom_segment( aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
geom_point(size=3, color="#69b3a2") +
coord_flip() +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position="none"
) +
xlab("") +
ylab("over_represented_pvalue") +
ggtitle("Adult") + #add a main title
theme(plot.title = element_text(face = 'bold',
size = 12,
hjust = 0)) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank(),#Set the plot background
legend.position="none")
GO.CC %>%
mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
ggplot(aes(x=term, y=over_represented_pvalue) ) +
geom_segment( aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
geom_point(size=3, color="#69b3a2") +
coord_flip() +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position="none"
) +
xlab("") +
ylab("over_represented_pvalue") +
ggtitle("Adult") + #add a main title
theme(plot.title = element_text(face = 'bold',
size = 12,
hjust = 0)) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank(),#Set the plot background
legend.position="none")